# JUDICIAL PROFESSIONAL BACKGROUND AND PRETRIAL DETENTION OUTCOMES
# Oded Oren and Chad Topaz
library("this.path")
source(paste0(dirname(this.path()), "/functions.R"))

# Data Loading -----------------------------------------------------------------
# Load judge biographical data and case-level arraignment data
judgeinfo <- readRDS(paste0(dirname(this.path()), "/data/judgeinfo.RDS")) %>%
  select(-detailed_experience)
detentiondata <- readRDS(paste0(dirname(this.path()), "/data/detentiondata.RDS"))
baildata <- readRDS(paste0(dirname(this.path()), "/data/baildata.RDS"))


# Models -------------------------------------------------------------

## Quasi Random Testing — Table 4-----

data_quasi = detentiondata %>%
  left_join(
    readRDS(paste0(dirname(this.path()), "/data/judgeinfo.RDS")) %>%
      select(judge, detailed_experience), by = "judge"
    )

# All defendant and case characteristics used as predictors
wald_vars <- paste0(
  "age + gender_defendant + supervision + ",
  "topseverity + chargecategory + offenselevel + ",
  "misd.convictions + nvfo.convictions + vfo.convictions + ",
  "vfo.pending + nvfo.pending + misd.pending"
)
# Fixed effects control for arraignment date, county, and arrest type
fixed_effects <- "arresttype^arraigndate^county"

Q1 <- data_quasi %>%
  mutate(law_enforcement = as.integer(law_enforcement)) %>%
  feols(as.formula(paste("law_enforcement ~", wald_vars, "|", fixed_effects)),
        cluster = ~ judge)
Q1 %>% wald()

Q2 <- data_quasi %>%
  mutate(legal_services = as.integer(legal_services)) %>%
  feols(as.formula(paste("legal_services ~", wald_vars, "|", fixed_effects)),
        cluster = ~ judge)
Q2 %>% wald

Q3 <- data_quasi %>%
  mutate(both = as.integer(experience == "Both")) %>%
  feols(as.formula(paste("both ~", wald_vars, "|", fixed_effects)),
        cluster = ~ judge)
Q3 %>% wald

Q4 <- data_quasi %>%
  mutate(none = as.integer(experience == "None")) %>%
  feols(as.formula(paste("none ~", wald_vars, "|", fixed_effects)),
        cluster = ~ judge)
Q4 %>% wald

Q5 <- data_quasi %>%
  mutate(prosecutor = as.integer(detailed_experience == "Prosecutor")) %>%
  feols(as.formula(paste("prosecutor ~", wald_vars, "|", fixed_effects)),
        cluster = ~ judge)
Q5 %>% wald

Q6 <- data_quasi %>%
  mutate(AUSA = as.integer(detailed_experience == "AUSA")) %>%
  feols(as.formula(paste("AUSA ~", wald_vars, "|", fixed_effects)),
        cluster = ~ judge)
Q6 %>% wald

Q7 <- data_quasi %>%
  mutate(police = as.integer(detailed_experience == "Police")) %>%
  feols(as.formula(paste("police ~", wald_vars, "|", fixed_effects)),
        cluster = ~ judge)
Q7 %>% wald

## Main Models — Experience vs None — Table 6----

### Detention Model — Table 6 --------------------------------------------------
vars_detention <- setdiff(names(detentiondata), c("detained", "judge", "law_enforcement", "legal_services"))

model_detention <- feols(
  as.formula(paste("detained ~", paste(vars_detention, collapse = " + "))),
  data = detentiondata,
  cluster = ~ judge
  )
summary(model_detention)

### Bail Extensive Model — Table 6 --------------------------------------------
data_extensive <- baildata %>%
  mutate(bail_set = cash.bail > 0)
vars_extensive <- setdiff(
  names(data_extensive),
  c("bail_set",
    "cash.bail",
    "judge",
    "law_enforcement",
    "legal_services")
)

model_extensive <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive, collapse = " + "))),
  data    = data_extensive,
  cluster = ~ judge
)

summary(model_extensive)

### Bail Intensive Model — Table 6 --------------------------------------------
data_intensive = baildata %>%
  filter(cash.bail > 0) %>%
  mutate(log_bail = log(cash.bail))

model_intensive <- lm(
  log_bail ~ . - judge - cash.bail - law_enforcement - legal_services - log_bail,
  data = data_intensive)

vcov_intensive  <- vcovCL(model_intensive,
                          data = data_intensive,
                          cluster = ~judge,
                          type = "HC1")

print(coeftest(model_intensive, vcov = vcov_intensive))

## Law Enforcement vs. All Others — Table 7 ----
### Detention — Table 7 (LE vs All) -------------------------------------------
vars_detention_LE <- setdiff(
  names(detentiondata),
  c("detained", "judge", "experience",
    "legal_services")
)

model_detention_LE <- feols(
  as.formula(paste("detained ~", paste(vars_detention_LE, collapse = " + "))),
  data    = detentiondata,
  cluster = ~ judge
)
summary(model_detention_LE)

### Bail Extensive — Table 7 (LE vs All) --------------------------------------
data_extensive_LE <- baildata %>%
  mutate(bail_set = cash.bail > 0)

vars_extensive_LE <- setdiff(
  names(data_extensive_LE),
  c("bail_set",
    "cash.bail",
    "judge",
    "experience",
    "legal_services")
)

model_extensive_LE <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive_LE, collapse = " + "))),
  data    = data_extensive_LE,
  cluster = ~ judge
)

summary(model_extensive_LE)

### Bail Intensive — Table 7 (LE vs All) --------------------------------------
data_intensive_LE <- baildata %>%
  filter(cash.bail > 0) %>%
  mutate(log_bail = log(cash.bail))

model_intensive_LE <- lm(
  log_bail ~ . -judge -cash.bail -experience -legal_services -log_bail,
  data = data_intensive_LE
)

vcov_intensive_LE <- vcovCL(model_intensive_LE,
                            data = data_intensive_LE,
                            cluster = ~judge,
                            type    = "HC1")

print(coeftest(model_intensive_LE, vcov = vcov_intensive_LE))

## Legal Services vs. All Others — Table 7 ----
### Detention — Table 7 (LS vs All) -------------------------------------------
vars_detention_LS <- setdiff(
  names(detentiondata),
  c("detained", "judge", "experience", "law_enforcement")
)

model_detention_LS <- feols(
  as.formula(paste("detained ~", paste(vars_detention_LS, collapse = " + "))),
  data    = detentiondata,
  cluster = ~ judge
)
summary(model_detention_LS)


### Bail Extensive — Table 7 (LS vs All) --------------------------------------
data_extensive_LS <- baildata %>%
  mutate(bail_set = cash.bail > 0)

vars_extensive_LS <- setdiff(
  names(data_extensive_LS),
  c("bail_set",
    "cash.bail",
    "judge",
    "experience",
    "law_enforcement")
)

model_extensive_LS <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive_LS, collapse = " + "))),
  data    = data_extensive_LS,
  cluster = ~ judge
)

summary(model_extensive_LS)

### Bail Intensive — Table 7 (LS vs All) --------------------------------------
data_intensive_LS <- baildata %>%
  filter(cash.bail > 0) %>%
  mutate(log_bail = log(cash.bail))

model_intensive_LS <- lm(
  log_bail ~ . -judge -cash.bail -experience -law_enforcement -log_bail,
  data = data_intensive_LS
)

vcov_intensive_LS <- vcovCL(model_intensive_LS,
                            data = data_intensive_LS,
                            cluster = ~judge,
                            type    = "HC1")

print(coeftest(model_intensive_LS, vcov = vcov_intensive_LS))

## Law enforcement tenure analysis — Table 9----
### Detention ------------------------------------------------------------------
data_detention_yrs = detentiondata %>%
  filter(law_enforcement) %>%
  left_join(
    judgeinfo %>%
      select(judge, years_prosecutor, pros_year_group),
    by = "judge"
  ) %>%
  filter(!is.na(years_prosecutor)) %>%
  select(-law_enforcement, - experience, - legal_services)

vars_detention_yrs1 <- setdiff(names(data_detention_yrs), c("detained", "judge", "pros_year_group"))
vars_detention_yrs2 <- setdiff(names(data_detention_yrs), c("detained", "judge", "years_prosecutor"))

model_detention_yrs1 <- feols(
  as.formula(paste("detained ~", paste(vars_detention_yrs1, collapse = " + "))),
  data = data_detention_yrs,
  cluster = ~ judge)
model_detention_yrs2 <- feols(
  as.formula(paste("detained ~", paste(vars_detention_yrs2, collapse = " + "))),
  data = data_detention_yrs,
  cluster = ~ judge)

summary(model_detention_yrs1)
summary(model_detention_yrs2)

### Bail Extensive -------------------------------------------------------------
data_extensive_yrs <- baildata %>%
  filter(law_enforcement) %>%
  left_join(
    judgeinfo %>%
      select(judge, years_prosecutor, pros_year_group),
    by = "judge"
  ) %>%
  filter(!is.na(years_prosecutor)) %>%
  mutate(bail_set = cash.bail > 0) %>%
  select(-law_enforcement, -cash.bail, - experience, - legal_services)

vars_extensive_yrs1 <- setdiff(names(data_extensive_yrs), c("bail_set", "judge", "pros_year_group"))
vars_extensive_yrs2 <- setdiff(names(data_extensive_yrs), c("bail_set", "judge", "years_prosecutor"))

model_extensive_yrs1 <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive_yrs1, collapse = " + "))),
  data    = data_extensive_yrs,
  cluster = ~ judge
)

model_extensive_yrs2 <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive_yrs2, collapse = " + "))),
  data    = data_extensive_yrs,
  cluster = ~ judge
)

summary(model_extensive_yrs1)
summary(model_extensive_yrs2)

### Bail Intensive -------------------------------------------------------------
data_intensive_yrs <- baildata %>%
  filter(law_enforcement) %>%
  left_join(
    judgeinfo %>%
      select(judge, years_prosecutor, pros_year_group),
    by = "judge"
  ) %>%
  filter(!is.na(years_prosecutor)) %>%
  filter(cash.bail > 0) %>%
  mutate(log_bail = log(cash.bail)) %>%
  select(-law_enforcement, -cash.bail, - legal_services, - experience)

model_intensive_yrs1 <- lm(
  log_bail ~ . - judge - log_bail - pros_year_group,
  data = data_intensive_yrs
)

model_intensive_yrs2 <- lm(
  log_bail ~ . - judge - log_bail - years_prosecutor,
  data = data_intensive_yrs
)

vcov_intensive_yrs1 <- vcovCL(model_intensive_yrs1,
                             data = data_intensive_yrs,
                             cluster = ~judge,
                             type    = "HC1")
vcov_intensive_yrs2 <- vcovCL(model_intensive_yrs2,
                             data = data_intensive_yrs,
                             cluster = ~judge,
                             type    = "HC1")

print(coeftest(model_intensive_yrs1, vcov = vcov_intensive_yrs1))
print(coeftest(model_intensive_yrs2, vcov = vcov_intensive_yrs2))

## Detailed Experience Categories — Table 8-----
### Detention ------------------------------------------------------------------
detentiondata_detailed <- detentiondata %>%
  select(-experience) %>%
  left_join(
    readRDS(paste0(dirname(this.path()), "/data/judgeinfo.RDS")) %>% select(detailed_experience, judge),
    by = "judge"
  ) %>%
  mutate(detailed_experience = forcats::fct_drop(detailed_experience))

vars_detention_detailed <- setdiff(names(detentiondata_detailed), c("detained", "judge", "law_enforcement", "legal_services"))

model_detention_detailed <- feols(
  as.formula(paste("detained ~", paste(vars_detention_detailed, collapse = " + "))),
  data = detentiondata_detailed,
  cluster = ~ judge)

summary(model_detention_detailed)

### Bail Extensive -------------------------------------------------------------
data_extensive_detailed <- baildata %>%
  mutate(bail_set = cash.bail > 0) %>%
  select(-experience) %>%
  left_join(
    readRDS(paste0(dirname(this.path()), "/data/judgeinfo.RDS")) %>% select(detailed_experience, judge),
    by = "judge"
  ) %>%
  mutate(detailed_experience = forcats::fct_drop(detailed_experience))


vars_extensive_detailed <- setdiff(
  names(data_extensive_detailed),
  c("bail_set",
    "cash.bail",
    "judge",
    "law_enforcement",
    "legal_services")
)

model_extensive_detailed <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive_detailed, collapse = " + "))),
  data    = data_extensive_detailed,
  cluster = ~ judge
)

summary(model_extensive_detailed)

### Bail Intensive -------------------------------------------------------------
data_intensive_detailed = baildata %>%
  filter(cash.bail > 0) %>%
  mutate(log_bail = log(cash.bail)) %>%
  select(-experience) %>%
  left_join(
    readRDS(paste0(dirname(this.path()), "/data/judgeinfo.RDS")) %>% select(detailed_experience, judge),
    by = "judge"
  ) %>%
  mutate(detailed_experience = forcats::fct_drop(detailed_experience))

model_intensive_detailed <- lm(
  log_bail ~ . - judge - cash.bail - law_enforcement - legal_services - log_bail,
  data = data_intensive_detailed)

vcov_intensive_detailed  <- vcovCL(model_intensive_detailed,
                          data = data_intensive_detailed,
                          cluster = ~judge,
                          type = "HC1")

print(coeftest(model_intensive_detailed, vcov = vcov_intensive_detailed))

## Collapse 'Both' into Law Enforcement — Table A3------
### Detention ------------------------------------------------------------------
detentiondata_both <- detentiondata %>%
  mutate(
    experience = case_when(
      experience == "Both" ~ "Law Enforcement",
      TRUE ~ experience
      ),
    experience = as.factor(experience),
    experience = relevel(experience, ref = "None")
  )
vars_detention_both <- setdiff(names(detentiondata_both), c("detained", "judge", "law_enforcement", "legal_services"))

model_detention_both <- feols(
  as.formula(paste("detained ~", paste(vars_detention_both, collapse = " + "))),
  data = detentiondata_both,
  cluster = ~ judge
)
summary(model_detention_both)

### Bail Extensive -------------------------------------------------------------
data_extensive_both <- baildata %>%
  mutate(bail_set = cash.bail > 0) %>%
  mutate(
    experience = case_when(
      experience == "Both" ~ "Law Enforcement",
      TRUE ~ experience
    ),
    experience = as.factor(experience),
    experience = relevel(experience, ref = "None")
  )


vars_extensive_both <- setdiff(
  names(data_extensive_both),
  c("bail_set",
    "cash.bail",
    "judge",
    "law_enforcement",
    "legal_services")
)

model_extensive_both <- feols(
  as.formula(paste("bail_set ~", paste(vars_extensive_both, collapse = " + "))),
  data    = data_extensive_both,
  cluster = ~ judge
)

summary(model_extensive_both)

### Bail Intensive -------------------------------------------------------------
data_intensive_both = baildata %>%
  filter(cash.bail > 0) %>%
  mutate(log_bail = log(cash.bail)) %>%
  mutate(
    experience = case_when(
      experience == "Both" ~ "Law Enforcement",
      TRUE ~ experience
    ),
    experience = as.factor(experience),
    experience = relevel(experience, ref = "None")
  )


model_intensive_both <- lm(
  log_bail ~ . - judge - cash.bail - law_enforcement - legal_services - log_bail,
  data = data_intensive_both)

vcov_intensive_both  <- vcovCL(model_intensive_both,
                          data = data_intensive_both,
                          cluster = ~judge,
                          type = "HC1")

print(coeftest(model_intensive_both, vcov = vcov_intensive_both))

## Policy Impact Calculations-----0
### Detention Impact -----------------------------------------------------------
datayears_d <- detentiondata |>
  count(judge, name = "cases") |>
  mutate(datayears = purrr::map_dbl(judge,
                                    ~ get_judge_years(detentiondata, .x)))

avgcasesperyear_d <- detentiondata %>%
  left_join(datayears_d, by = "judge") %>%
  mutate(casesperyear = cases/datayears) %>%
  filter(law_enforcement == TRUE) %>%
  select(judge, casesperyear) %>%
  distinct() %>%
  pull(casesperyear) %>%
  mean

avgcasesperyear_d
coef(summary(model_detention))['experienceLaw Enforcement'][[1]]
coef(summary(model_detention))['experienceLaw Enforcement'][[1]] * avgcasesperyear_d *10
coef(summary(model_detention))['experienceLaw Enforcement'][[1]] * avgcasesperyear_d *10*97/365
coef(summary(model_detention))['experienceLaw Enforcement'][[1]] * avgcasesperyear_d *10*1375*97

### Bail Amount Impact ---------------------------------------------------------
datayears_int <- data_intensive |>
  count(judge, name = "cases") |>
  mutate(datayears = purrr::map_dbl(judge,
                                    ~ get_judge_years(data_intensive, .x)))

avgcasesperyear_int <- data_intensive %>%
  left_join(datayears_int, by = "judge") %>%
  mutate(casesperyear = cases/datayears) %>%
  filter(law_enforcement == TRUE) %>%
  select(judge, casesperyear) %>%
  distinct() %>%
  pull(casesperyear) %>%
  mean

delta_pct = (exp(coef(coeftest(model_intensive, vcov = vcov_intensive))['experienceLaw Enforcement'][[1]])-1)
delta_pct
mean_bail_none <- mean(data_intensive$cash.bail[data_intensive$experience == "None"])
mean_bail_none
extra_dollars_per_case = delta_pct*mean_bail_none
extra_dollars_per_case
extra_dollars_per_10_years = extra_dollars_per_case*avgcasesperyear_int*10
extra_dollars_per_10_years

# Generate Tables and Figures-------
## Table 1–2 ---------------------------------------------------------------
table_1_2(detentiondata, "table1.tex")
table_1_2(baildata, "table2.tex")

## Table 3 -----------------------------------------------------------------
table_3(detentiondata, baildata)

## Table 4 -----------------------------------------------------------------
table_4(Q1,
                    Q2,
                    Q3,
                    Q4,
                    Q5,
                    Q6,
                    Q7)

## Table 5 -----------------------------------------------------------------
table_5(detentiondata, baildata)

## Table 6 -----------------------------------------------------------------
table_6(model_detention, model_extensive, model_intensive, vcov_intensive)

## Table 7 -----------------------------------------------------------------
table_7(model_detention_LE,
        model_extensive_LE,
        model_intensive_LE,
        vcov_intensive_LE,
        model_detention_LS,
        model_extensive_LS,
        model_intensive_LS,
        vcov_intensive_LS
        )

## Table 8 -----------------------------------------------------------------
table_8(model_detention_detailed,
        model_extensive_detailed,
        model_intensive_detailed,
        vcov_intensive_detailed)

## Table 9 -----------------------------------------------------------------
table_9(model_detention_yrs1,
                    model_detention_yrs2,
                    model_extensive_yrs1,
                    model_extensive_yrs2,
                    model_intensive_yrs1,
                    model_intensive_yrs2,
                    vcov_intensive_yrs1,
                    vcov_intensive_yrs2)

## Appendix Table A2 -------------------------------------------------------
table_A2(model_detention, model_extensive, model_intensive, vcov_intensive)

## Appendix Table A3 -------------------------------------------------------
table_A3(model_detention_both, model_extensive_both, model_intensive_both, vcov_intensive_both)

## Figure 1 ----------------------------------------------------------------
fig_1(detentiondata)

## Figure A1 ---------------------------------------------------------------
fig_A1(baildata)
